# -*- coding:utf-8 -*-
import os
from pathlib import Path
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import re

class BuddhistTextCleaner:
    def __init__(self):
        try:
            self.window = tk.Tk()
            self.window.title("佛经翻译重复内容清理工具")
            self.window.geometry("900x700")
            self.create_widgets()
        except Exception as e:
            print(f"初始化错误: {str(e)}")
            raise

    def create_widgets(self):
        try:
            control_frame = ttk.Frame(self.window)
            control_frame.pack(fill='x', padx=10, pady=5)
            
            tk.Button(control_frame, 
                     text="选择需要处理的文件夹", 
                     command=self.select_folder).pack(side='left', padx=5)
            
            self.path_label = tk.Label(control_frame, text="未选择文件夹")
            self.path_label.pack(side='left', padx=5)
            
            tk.Button(control_frame, 
                     text="开始处理", 
                     command=self.process_files).pack(side='right', padx=5)
            
            self.progress = ttk.Progressbar(self.window, length=800, mode='determinate')
            self.progress.pack(pady=10, padx=10)
            
            log_frame = ttk.Frame(self.window)
            log_frame.pack(fill='both', expand=True, padx=10, pady=5)
            
            scrollbar = ttk.Scrollbar(log_frame)
            scrollbar.pack(side='right', fill='y')
            
            self.log_text = tk.Text(log_frame, height=30, width=90, yscrollcommand=scrollbar.set)
            self.log_text.pack(side='left', fill='both', expand=True)
            scrollbar.config(command=self.log_text.yview)
            
        except Exception as e:
            print(f"创建界面元素错误: {str(e)}")
            raise

    def select_folder(self):
        try:
            folder_path = filedialog.askdirectory()
            if folder_path:
                self.folder_path = folder_path
                self.path_label.config(text=f"已选择: {folder_path}")
                self.log_message(f"选择文件夹: {folder_path}")
        except Exception as e:
            self.log_message(f"选择文件夹错误: {str(e)}")

    def log_message(self, message):
        try:
            self.log_text.insert(tk.END, f"{message}\n")
            self.log_text.see(tk.END)
            self.window.update()
        except Exception as e:
            print(f"日志记录错误: {str(e)}")

    def find_duplicates(self, text_blocks):
        """查找并标记重复的文本块"""
        seen = set()
        duplicates = []
        for i, block in enumerate(text_blocks):
            if block.strip() in seen and block.strip():  # 只处理非空块
                duplicates.append(i)
            seen.add(block.strip())
        return duplicates

    def process_file(self, file_path):
        try:
            # 读取文件
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()

            # 按照空行分割成块
            blocks = re.split(r'\n\s*\n', content)
            
            # 找出重复的块
            duplicate_indices = self.find_duplicates(blocks)
            
            if duplicate_indices:
                # 移除重复块
                cleaned_blocks = [block for i, block in enumerate(blocks) if i not in duplicate_indices]
                
                # 保存结果
                output_dir = Path(file_path).parent / "cleaned"
                output_dir.mkdir(exist_ok=True)
                
                output_file = output_dir / f"cleaned_{Path(file_path).name}"
                with open(output_file, 'w', encoding='utf-8') as f:
                    f.write('\n\n'.join(cleaned_blocks))

                self.log_message(f"处理文件: {file_path.name}")
                self.log_message(f"删除了 {len(duplicate_indices)} 个重复文本块")
            else:
                self.log_message(f"文件 {file_path.name} 未发现需要清理的重复内容")
                output_dir = Path(file_path).parent / "cleaned"
                output_dir.mkdir(exist_ok=True)
                output_file = output_dir / f"cleaned_{Path(file_path).name}"
                with open(output_file, 'w', encoding='utf-8') as f:
                    f.write(content)

        except Exception as e:
            self.log_message(f"处理文件错误 {file_path}: {str(e)}")

    def process_files(self):
        try:
            if not hasattr(self, 'folder_path'):
                messagebox.showerror("错误", "请先选择文件夹！")
                return

            folder = Path(self.folder_path)
            files = list(folder.glob("*.txt"))
            
            if not files:
                self.log_message("未找到.txt文件！")
                return

            self.log_message(f"\n开始处理 {len(files)} 个文件...")
            
            for i, file_path in enumerate(files, 1):
                self.progress['value'] = i / len(files) * 100
                self.window.update()
                
                self.log_message(f"\n处理第 {i}/{len(files)} 个文件: {file_path.name}")
                self.process_file(file_path)
                
            self.log_message("\n全部处理完成！")
            messagebox.showinfo("完成", "文件处理完成！")
            
        except Exception as e:
            self.log_message(f"处理文件夹错误: {str(e)}")
            messagebox.showerror("错误", f"处理过程中发生错误: {str(e)}")

    def run(self):
        try:
            self.window.mainloop()
        except Exception as e:
            print(f"运行错误: {str(e)}")

if __name__ == "__main__":
    try:
        app = BuddhistTextCleaner()
        app.run()
    except Exception as e:
        print(f"程序启动错误: {str(e)}")
